package com.bj.zspace.flink.study.wc;

import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.operators.UnsortedGrouping;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;

/**
 *
 * DataSet API
 *
 * @author qing
 */
public class BatchWordCount {

    private static final Logger log = LoggerFactory.getLogger(BatchWordCount.class);

    public static void main(String[] args) throws Exception {
        // 1.创建执行环境
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        // 2.从文件读取数据
        DataSource<String> lineDataSource = env.fromCollection(Arrays.asList("hello world", "hello flink", "hello java"));

        // 3.将每行数据进行分词，转换成二元数组类型
        FlatMapOperator<String, Tuple2<String, Long>> wordAndOneTuple =
            lineDataSource.flatMap((String line, Collector<Tuple2<String, Long>> out) -> {
                // 将一行文本进行分词
                String[] words = line.split(" ");
                // 将每个单词转换成二元组输出
                for (String word : words) {
                    out.collect(Tuple2.of(word, 1L));
                }
            }).returns(Types.TUPLE(Types.STRING, Types.LONG));
        // 4.按照word进行分组
        UnsortedGrouping<Tuple2<String, Long>> wordAndOneGroup = wordAndOneTuple.groupBy(0);

        //5.分组类进行聚合统计
        AggregateOperator<Tuple2<String, Long>> sum = wordAndOneGroup.sum(1);

        //6.结果打印输出
        sum.print();

        log.info("ending ...");

    }

}
